In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.tree import DecisionTreeRegressor

# Load data
train_data = pd.read_csv('/content/drive/MyDrive/colab_notebooks/Data/main_dataset.csv')
train_data['Id'] = np.where(train_data['Id'] < 1e-18, 1e-18, train_data['Id'])
train_data['Log_Id'] = np.log10(train_data['Id'])
X = train_data[['Tox', 'Nc', 'Nd', 'Ns', 'Vds', 'Vgs']]
y = train_data['Log_Id']

# Polynomial features and scaling
poly = PolynomialFeatures(degree=3, include_bias=False)
X_poly = poly.fit_transform(X)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_poly)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Define hyperparameter grid for DecisionTreeRegressor
param_grid = {
    'max_depth': [3, 5, 10, 15,None],
    'min_samples_split': [2, 5, 10, 50, 100],
    'min_samples_leaf': [1, 2, 4, 10],
    'max_features': [None, 'sqrt', 'log2'],
}

# DataFrame to store results
results_df = pd.DataFrame(columns=['max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features',  'R2', 'MAE'])


random_search = RandomizedSearchCV(
    DecisionTreeRegressor(random_state=42),
    param_grid,
    n_iter=40,
    random_state=42,
    n_jobs=-1,
    verbose = 5,
    cv = 3,
)

random_search.fit(X_train, y_train)
Fitting 3 folds for each of 40 candidates, totalling 120 fits
No description has been provided for this image
No description has been provided for this image
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-d7f75b402af4> in <cell line: 49>()
     91     plt.show()
     92 
---> 93     current_result = pd.DataFrame({
     94         'max_depth': params['max_depth'],
     95         'min_samples_split': params['min_samples_split'],

/usr/local/lib/python3.10/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    776         elif isinstance(data, dict):
    777             # GH#38939 de facto copy defaults to False only in non-dict cases
--> 778             mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
    779         elif isinstance(data, ma.MaskedArray):
    780             from numpy.ma import mrecords

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/construction.py in dict_to_mgr(data, index, columns, dtype, typ, copy)
    501             arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]
    502 
--> 503     return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
    504 
    505 

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, columns, index, dtype, verify_integrity, typ, consolidate)
    112         # figure out the index, if necessary
    113         if index is None:
--> 114             index = _extract_index(arrays)
    115         else:
    116             index = ensure_index(index)

/usr/local/lib/python3.10/dist-packages/pandas/core/internals/construction.py in _extract_index(data)
    665 
    666     if not indexes and not raw_lengths:
--> 667         raise ValueError("If using all scalar values, you must pass an index")
    668 
    669     if have_series:

ValueError: If using all scalar values, you must pass an index
In [15]:
# Loop through each hyperparameter set
for params in random_search.cv_results_['params']:
    # Set up the model with the current parameters
    model = DecisionTreeRegressor(random_state=42, **params)
    model.fit(X_train, y_train)
    print(params)
    # Test on the first test dataset
    y_pred_test = model.predict(X_test)
    r2 = r2_score(y_test, y_pred_test)
    mae = mean_absolute_error(y_test, y_pred_test)

    # Load and preprocess the second test data
    test_data = pd.read_csv('/content/drive/MyDrive/colab_notebooks/Data/test_data.csv').iloc[0:203]
    test_data['Id'] = np.where(test_data['Id'] < 1e-18, 1e-18, test_data['Id'])
    test_data['Log_Id'] = np.log10(test_data['Id'])
    X_test_1 = test_data[['Tox', 'Nc', 'Nd', 'Ns', 'Vds', 'Vgs']]
    y_test_1 = test_data['Log_Id']
    X_test_1_transformed = poly.transform(X_test_1)
    X_test_1_scaled = scaler.transform(X_test_1_transformed)

    # Predict on the second test dataset and evaluate
    y_pred_test_1 = model.predict(X_test_1_scaled)
    r2_test_1 = r2_score(y_test_1, y_pred_test_1)
    mae_test_1 = mean_absolute_error(y_test_1, y_pred_test_1)

    # Log scale plot
    plt.figure(figsize=(10, 5))
    plt.plot(X_test_1['Vgs'], y_pred_test_1, color="green", label="Predicted")
    plt.plot(X_test_1['Vgs'], y_test_1, color="blue", label="Actual")
    plt.title(f'Vgs vs Id (Log scale) - Params: {params}')
    plt.xlabel('Vgs')
    plt.ylabel('Log10(Id)')
    plt.legend()
    plt.show()

    # Linear scale plot
    plt.figure(figsize=(10, 5))
    plt.plot(X_test_1['Vgs'], np.maximum(np.power(10, y_pred_test_1), 1e-18), color="green", label="Predicted")
    plt.plot(X_test_1['Vgs'], np.maximum(np.power(10, y_test_1), 1e-18), color="blue", label="Actual")
    plt.title(f'Vgs vs Id (Linear scale) - Params: {params}')
    plt.xlabel('Vgs')
    plt.ylabel('Id')
    plt.legend()
    plt.show()

    current_result = pd.DataFrame({
        'max_depth': params['max_depth'],
        'min_samples_split': params['min_samples_split'],
        'min_samples_leaf': params['min_samples_leaf'],
        'max_features': params['max_features'],
        'R2': r2_test_1,
        'MAE': mae_test_1
    }, index = [0])

    # Save to DataFrame

    results_df = pd.concat([results_df, current_result], ignore_index=True)

    print("-----------\n--------\n----------\n----------")
# Display results
print(results_df)
{'min_samples_split': 50, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': None}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 10}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 10, 'max_features': None, 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_depth': 10}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': None, 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': None}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 10}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 10, 'max_features': None, 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': None}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': None, 'max_depth': None}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': None}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_depth': None}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 10}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 10, 'max_features': None, 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 10}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 15}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 100, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None, 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 3}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
{'min_samples_split': 50, 'min_samples_leaf': 4, 'max_features': None, 'max_depth': 5}
No description has been provided for this image
No description has been provided for this image
-----------
--------
----------
----------
    max_depth min_samples_split min_samples_leaf max_features  \
0          15                50                1         sqrt   
1        None                 5                2         sqrt   
2          10                10                4         sqrt   
3           3               100                2         None   
4          15                50                4         log2   
..        ...               ...              ...          ...   
126         5               100               10         log2   
127         3                10                2         None   
128         5                 2                4         sqrt   
129         3                 5                2         log2   
130         5                50                4         None   

    min_impurity_decrease        R2       MAE  
0                     NaN  0.985844  0.291017  
1                     NaN  0.984648  0.289155  
2                     NaN  0.968032  0.528280  
3                     NaN  0.905205  0.926902  
4                     NaN  0.985877  0.305633  
..                    ...       ...       ...  
126                   NaN  0.905308  1.016112  
127                   NaN  0.905205  0.926902  
128                   NaN  0.985791  0.383561  
129                   NaN  0.193860  2.536882  
130                   NaN  0.977076  0.388789  

[131 rows x 7 columns]
In [3]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [11]:
print(results_df)
results_df.to_csv('/content/dt_hyp_tuning_results.csv')
   max_depth min_samples_split min_samples_leaf max_features  \
0         15                50                1         sqrt   
1       None                 5                2         sqrt   
2         10                10                4         sqrt   
3          3               100                2         None   
4         15                50                4         log2   
..       ...               ...              ...          ...   
66         5               100               10         log2   
67         3                10                2         None   
68         5                 2                4         sqrt   
69         3                 5                2         log2   
70         5                50                4         None   

   min_impurity_decrease        R2       MAE  
0                    NaN  0.985844  0.291017  
1                    NaN  0.984648  0.289155  
2                    NaN  0.968032  0.528280  
3                    NaN  0.905205  0.926902  
4                    NaN  0.985877  0.305633  
..                   ...       ...       ...  
66                   NaN  0.905308  1.016112  
67                   NaN  0.905205  0.926902  
68                   NaN  0.985791  0.383561  
69                   NaN  0.193860  2.536882  
70                   NaN  0.977076  0.388789  

[71 rows x 7 columns]